package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo5Submit {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .appName("submit")
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()

    import spark.implicits._
    import org.apache.spark.sql.functions._

    val linesDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", "\t")
      .schema("line STRING")
      .load("/data/words.txt")


    linesDF
      .select(explode(split($"line", ",")) as "word")
      .groupBy($"word")
      .agg(count($"word") as "count")

      .write
      .format("csv")
      .option("sep", "\t")
      .mode(SaveMode.Overwrite)
      .save("/data/dfcount")

    /**
      * 1、需要删除代码中的master
      * 2、修改数据路径
      *
      * spark-submit --class  com.shujia.spark.sql.Demo5Submit --master yarn-client spark-1.0.jar
      *
      */


  }

}
